# Script to produce Table 1, Figure 1
# Gould et al. 2023 EHP

# 0. Libraries -------

library(tidyverse)
library(haven)
library(cowplot)
library(arsenal)
library(sf)

# Specify how we want summary tables to look (from arsenal package)
mycontrols  <- tableby.control(test=TRUE, total=FALSE,
                               numeric.stats=c("meansd", "medianq1q3"),
                               cat.stats=c("countpct"),
                               stats.labels=list(countpct='N (%)', meansd='Mean (SD)', medianq1q3="Median (IQR)", range = "Range"),
                               digits = 2L,
                               digits.pct = 0L,
                               digits.p = 3L,
                               pfootnote = T)

# 1. Data ---------

# Canton ID matching and population
canton_Ns <- read_csv("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Manuscript/Submission/LGH/Data/canton_id_universal.csv") %>%
  dplyr::select(-`...1`)


# Shapefiles
ecu_shp1 <- sf::st_read("~/Downloads/ecu_adm_inec_20190724_shp/ecu_admbnda_adm1_inec_20190724.shp")
ecu_shp2 <- sf::st_read("~/Downloads/ecu_adm_inec_20190724_shp/ecu_admbnda_adm2_inec_20190724.shp")
ecu_shp2_df_bind <- read_rds("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Code/May2020/OtherCovariates/period-avgs/ecu_shp2_df_bind.rds")
cf_province <- read_rds("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Code/DataProducts/CF_Predictions/province_cf_period_avg.rds")

# Main data ------
full_df <- read_rds("~/Downloads/gould_ehp_ecuador_cf_df.rds")

# 2. Analysis ------

# VIF for main linear model ------
car::vif(glm(ALRI5_5_plus ~
               offset(log(under5population)) + 
               cf10 + # % CF per 10 pp increase
               percent_rural_corrected_ratio + 
               electricidad_no + 
               materials_index + 
               elimina_servidas_red_pozo_letrina +
               Literate_women +
               InSchool_girls + 
               Idioma_indigena_self +
               pcv3_use + 
               vaccine_index + 
               ANC_use + 
               ANC_visits_median_use 
             ,
             data=full_df,
             family = quasipoisson()))

# Spatial clustering of residuals --------

residuals_main_model <- 
  residuals(alri5_glm_1_mod) %>% 
  bind_cols(
    full_df %>% 
      mutate(rownumber = row_number()) %>% 
      filter(rownumber %in% fixest::obs(alri5_glm_1_mod)) %>% 
      dplyr::select(canton_id_universal, period)) %>% 
  rename(residuals = `...1`)

ecu_shp2_df_bind1_residuals <- ecu_shp2_df_bind %>% 
  filter(canton_id_universal!="09_01") %>% 
  filter(canton_id_universal!="17_01")  %>% 
  left_join(residuals_main_model %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              pivot_wider(names_from = period, values_from = c(residuals)) %>% 
              rename(residuals = `1990`), by ="ADM2_PCODE") %>% 
  filter(!is.na(residuals)) %>% 
  mutate(areanumber = row_number()) %>% 
  filter(areanumber!=63) %>% # these areas were spitting out errors so they are dropped
  filter(areanumber!=129) %>% 
  filter(areanumber!=137)

ecu_shp2_df_bind1_residuals_1 <- ecu_shp2_df_bind %>% 
  filter(canton_id_universal!="09_01") %>% 
  filter(canton_id_universal!="17_01")  %>% 
  left_join(residuals_main_model %>% 
              filter(period=="1990") %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              pivot_wider(names_from = period, values_from = c(residuals)) %>% 
              rename(residuals = `1990`), by ="ADM2_PCODE") %>% 
  filter(!is.na(residuals)) %>% 
  mutate(areanumber = row_number()) %>% 
  filter(areanumber!=63) %>% # these areas were spitting out errors so they are dropped
  filter(areanumber!=129) %>% 
  filter(areanumber!=137)


ecu_shp2_df_bind1_residuals_2 <- 
  ecu_shp2_df_bind %>% 
  left_join(residuals_main_model %>% 
              filter(period=="2001") %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              pivot_wider(names_from = period, values_from = c(residuals)) %>% 
              rename(residuals = `2001`), by ="ADM2_PCODE") %>% 
  filter(!is.na(residuals)) %>% 
  mutate(areanumber = row_number()) %>% 
  filter(areanumber!=63) %>% 
  filter(areanumber!=129) %>% 
  filter(areanumber!=137)

ecu_shp2_df_bind1_residuals_3 <- ecu_shp2_df_bind %>% 
  left_join(residuals_main_model %>% 
              filter(period=="2010") %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              pivot_wider(names_from = period, values_from = c(residuals)) %>% 
              rename(residuals = `2010`), by ="ADM2_PCODE") %>% 
  filter(!is.na(residuals)) %>% 
  mutate(areanumber = row_number()) %>% 
  filter(areanumber!=63) %>% 
  filter(areanumber!=129) %>% 
  filter(areanumber!=137)

ecu_shp2_df_bind1_residuals_4 <- ecu_shp2_df_bind %>% 
  left_join(residuals_main_model %>% 
              filter(period=="2015-2019") %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              pivot_wider(names_from = period, values_from = c(residuals)) %>% 
              rename(residuals = `2015-2019`), by ="ADM2_PCODE") %>% 
  filter(!is.na(residuals)) %>% 
  mutate(areanumber = row_number()) %>% 
  filter(areanumber!=63) %>% 
  filter(areanumber!=129) %>% 
  filter(areanumber!=137)

neighbors <- spdep::poly2nb(ecu_shp2_df_bind1_residuals, queen=T)

list_weights <- spdep::nb2listw(neighbors, style="W", zero.policy=TRUE)

morans_i <- spdep::moran(
  ecu_shp2_df_bind1_residuals$residuals, 
  list_weights, 
  length(neighbors), 
  Szero(list_weights))[1]

# morans_i

montecarlo_moran <- 
  moran.mc(
    ecu_shp2_df_bind1_residuals$residuals,
    list_weights, 
    nsim=999, 
    alternative="greater")

montecarlo_moran

# plot(montecarlo_moran)

canton_residuals_map_1 <- 
  ggplot(data=ecu_shp2_df_bind1_residuals_1) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=residuals), 
          colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  # scale_fill_viridis_c() +
  scale_fill_viridis_c() +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "Residuals") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "right",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_residuals_map_2 <- ggplot(data=ecu_shp2_df_bind1_residuals_2) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=residuals), 
          colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  # scale_fill_viridis_c() +
  scale_fill_viridis_c() +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "Residuals") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "right",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_residuals_map_3 <- ggplot(data=ecu_shp2_df_bind1_residuals_3) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=residuals), 
          colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  # scale_fill_viridis_c() +
  scale_fill_viridis_c() +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "Residuals") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "right",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_residuals_map_4 <- ggplot(data=ecu_shp2_df_bind1_residuals_4) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=residuals), 
          colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  # scale_fill_viridis_c() +
  scale_fill_viridis_c() +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "Residuals") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "right",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_residuals_map_combined <-
  plot_grid(
    canton_residuals_map_1,
    canton_residuals_map_2,
    canton_residuals_map_3,
    canton_residuals_map_4,
    labels = c("1988-1992", "1999-2003", "2008-2012", "2015-2019")
    
  )

canton_residuals_map_combined

# 4. Main linear models --------

# Empty model -------

alri5_glm_empty_mod <- fixest::feglm(ALRI5_5_plus ~
                                       offset(log(under5population)) + 
                                       
                                       cf10 |
                                       
                                       as.factor(canton_id_universal) +
                                       as.factor(period)
                                     ,
                                     data=full_df,
                                     family = quasipoisson())

tidy(alri5_glm_empty_mod, conf.int = T)

# Model 1 -------

alri5_glm_1_mod <- fixest::feglm(ALRI5_5_plus ~
                                   offset(log(under5population)) + 
                                   
                                   cf10 +
                                   
                                   percent_rural_corrected_ratio + 
                                   electricidad_no + 
                                   
                                   # material_techo_nicer_all + 
                                   # material_techo_hormigon_losa_cemento_use + 
                                   # material_pared_hormigon_bloque_ladrillo + # close enough
                                   # material_piso_nicer + 
                                   # material_piso_tierra + 
                                   materials_index + 
                                   
                                   # obtiene_agua_redpublica_tuberia_adentro_use + 
                                   # servicio_higenico_redpublica_use + 
                                   # elimina_servidas_red_pozo + 
                                   elimina_servidas_red_pozo_letrina + 
                                   # servicio_ducha_excluso + 
                                   # elimina_basura_service + 
                                   # hygiene_index + 
                                   
                                   Literate_women + 
                                   InSchool_girls + 
                                   Idioma_indigena_self + 
                                   
                                   # BCG_use + 
                                   # DPT3_use + 
                                   # SAR_use + 
                                   # Polio_use + 
                                   pcv3_use + 
                                   vaccine_index + 
                                   
                                   # mage_mean +
                                   ANC_use + # pretty much
                                   ANC_visits_median_use # +
                                 |
                                   
                                   as.factor(canton_id_universal) +
                                   as.factor(period),
                                 data=full_df,
                                 family = quasipoisson())

tidy(alri5_glm_1_mod,conf.int = T)


# Model 2 --------

alri5_glm_2_mod <- fixest::feglm(ALRI5_5_plus ~
                                   offset(log(under5population)) + 
                                   
                                   cf10 +
                                   
                                   percent_rural_corrected_ratio + 
                                   electricidad_no + 
                                   
                                   # material_techo_nicer_all + 
                                   # material_techo_hormigon_losa_cemento_use + 
                                   # material_pared_hormigon_bloque_ladrillo + # close enough
                                   # material_piso_nicer + 
                                   # material_piso_tierra + 
                                   materials_index + 
                                   
                                   # obtiene_agua_redpublica_tuberia_adentro_use + 
                                   # servicio_higenico_redpublica_use + 
                                   # elimina_servidas_red_pozo + 
                                   elimina_servidas_red_pozo_letrina + 
                                   # servicio_ducha_excluso + 
                                   # elimina_basura_service + 
                                   # hygiene_index + 
                                   
                                   # Literate_women + 
                                   InSchool_girls + 
                                   Idioma_indigena_self + 
                                   
                                   # BCG_use + 
                                   # DPT3_use + 
                                   # SAR_use + 
                                   # Polio_use + 
                                   pcv3_use + 
                                   vaccine_index + 
                                   
                                   ANC_use + # pretty much 
                                   ANC_visits_median_use |
                                   
                                   as.factor(canton_id_universal) +
                                   as.factor(period)
                                 
                                 ,
                                 data=full_df,
                                 family = quasipoisson())

# Model 3 --------
alri5_glm_3_mod <- fixest::feglm(ALRI5_5_plus ~
                                   
                                   offset(log(under5population)) + 
                                   
                                   cf10 +
                                   
                                   percent_rural_corrected_ratio + 
                                   electricidad_no + 
                                   
                                   # material_techo_nicer_all + 
                                   # material_techo_hormigon_losa_cemento_use + 
                                   # material_pared_hormigon_bloque_ladrillo + # close enough
                                   # material_piso_nicer + 
                                   # material_piso_tierra + 
                                   materials_index + 
                                   
                                   obtiene_agua_redpublica_tuberia_adentro_use + 
                                   # servicio_higenico_redpublica_use + 
                                   # elimina_servidas_red_pozo + 
                                   # elimina_servidas_red_pozo_letrina + 
                                   # servicio_ducha_excluso + 
                                   # elimina_basura_service + 
                                   # hygiene_index + 
                                   
                                   Literate_women + 
                                   InSchool_girls + 
                                   Idioma_indigena_self + 
                                   
                                   # BCG_use + 
                                   # DPT3_use + 
                                   # SAR_use + 
                                   # Polio_use + 
                                   pcv3_use + 
                                   vaccine_index + 
                                   
                                   ANC_use + # pretty much 
                                   ANC_visits_median_use |
                                   
                                   as.factor(canton_id_universal) +
                                   as.factor(period)
                                 
                                 ,
                                 data=full_df,
                                 family = quasipoisson())


# Model 4 --------
alri5_glm_4_mod <- fixest::feglm(ALRI5_5_plus ~
                                   offset(log(under5population)) + 
                                   
                                   cf10 +
                                   
                                   # material_techo_nicer_all + 
                                   # material_techo_hormigon_losa_cemento_use + 
                                   # material_pared_hormigon_bloque_ladrillo + # close enough
                                   # material_piso_nicer + 
                                   # material_piso_tierra + 
                                   materials_index + 
                                   
                                   obtiene_agua_redpublica_tuberia_adentro_use + 
                                   # servicio_higenico_redpublica_use + 
                                   # elimina_servidas_red_pozo + 
                                   # elimina_servidas_red_pozo_letrina + 
                                   # servicio_ducha_excluso + 
                                   # elimina_basura_service + 
                                   # hygiene_index + 
                                   
                                   # Literate_women + 
                                   InSchool_girls + 
                                   Idioma_indigena_self + 
                                   
                                   # BCG_use + 
                                   # DPT3_use + 
                                   # SAR_use + 
                                   # Polio_use + 
                                   pcv3_use + 
                                   vaccine_index + 
                                   
                                   ANC_use + 
                                   ANC_visits_median_use |
                                   
                                   as.factor(canton_id_universal) +
                                   as.factor(period),
                                 data=full_df,
                                 family = quasipoisson())


# Model 5 --------
alri5_glm_5_mod <- fixest::feglm(ALRI5_5_plus ~
                                   offset(log(under5population)) + 
                                   
                                   cf10 +
                                   
                                   materials_index + 
                                   
                                   obtiene_agua_redpublica_tuberia_adentro_use +
                                   
                                   InSchool_girls + 
                                   Idioma_indigena_self + 
                                   
                                   pcv3_use + 
                                   vaccine_index + 
                                   
                                   mage_mean + 
                                   ANC_use + 
                                   ANC_visits_median_use |
                                   
                                   as.factor(canton_id_universal) +
                                   as.factor(period),
                                 data=full_df,
                                 family = quasipoisson())


# 5. Assimilate estimates for %CF ----------

alri5_glm_estimates <- tidy(alri5_glm_empty_mod)[2,] %>% 
  mutate(model = "Model 0: Empty model") %>% 
  bind_rows(tidy(alri5_glm_1_mod)[2,] %>% 
              mutate(model = "Model 1:\nPreferred specification")) %>% 
  bind_rows(tidy(alri5_glm_2_mod)[1,] %>% 
              mutate(model = "Model 2:\nModel 1 minus adult\nfemale literacy")) %>% 
  bind_rows(tidy(alri5_glm_3_mod)[1,] %>% 
              mutate(model = "Model 3:\nModel 1 replace solid waste removal\nwith piped water")) %>% 
  bind_rows(tidy(alri5_glm_4_mod)[1,] %>% 
              mutate(model = "Model 4:\nModel 3 minus adult\nfemale literacy")) %>% 
  bind_rows(tidy(alri5_glm_5_mod)[1,] %>% 
              mutate(model = "Model 5:\nModel 1 plus mean\nage of mother")) %>% 
  mutate(
    irr = exp(estimate),
    int66_hi = exp(estimate + (0.412*std.error)),
    int66_low = exp(estimate - (0.412*std.error)),
    int80_hi = exp(estimate + (0.842*std.error)),
    int80_low = exp(estimate - (0.842*std.error)),
    int95_hi = exp(estimate + (1.95996*std.error)),
    int95_low = exp(estimate - (1.95996*std.error))) 

# 5.1 Make figure ---------

alternative_specifications_glm <- ggplot() +
  geom_interval(data=alri5_glm_estimates, 
                aes(x=model, y=irr, ymin=int95_low, ymax=int95_hi, color=model), alpha=0.50, size = 2.5) +
  geom_interval(data=alri5_glm_estimates, 
                aes(x=model, y=irr, ymin=int80_low, ymax=int80_hi, color=model), alpha=0.66, size = 3) +
  geom_interval(data=alri5_glm_estimates, 
                aes(x=model, y=irr, ymin=int66_low, ymax=int66_hi, color=model), alpha=0.95, size = 3.5) +  
  geom_point(data=alri5_glm_estimates, 
             aes(x=model, y=irr), size=2, shape=18, color="white") + 
  ggsci::scale_color_lancet() + 
  
  ggtitle("Potential confounders specification plot",
          subtitle = "Quasi-Poisson generalized linear models") + 
  ylab("Mortality rate ratio (66%, 80%, 95% CI)\nper 10 p.p. increase in %CF") + xlab("") + 
  
  geom_hline(yintercept=1) + 
  scale_y_continuous(breaks=c(0.6, 0.7, 0.8, 0.9, 1.0)) +
  # coord_cartesian(ylim=c(-1, 0)) + 
  # scale_color_viridis_d() + 
  theme_bw() + 
  theme(axis.text.y = element_text(size=12, color="black"),
        axis.text.x = element_text(size=10, color="black"),
        axis.title = element_text(size=13, face="bold"),
        plot.title = element_text(size=14, face="bold"),
        legend.position="none",
        legend.title = element_blank(),
        legend.text=element_text(size=10, color="black"))

alternative_specifications_glm

glm_estimate_fig_legend <- ggplot() +
  geom_interval(data=alri5_glm_estimates %>% filter(model=="Model 0: Empty model"), 
                aes(x=model, y=irr, ymin=int95_low, ymax=int95_hi), alpha=0.50) +
  geom_interval(data=alri5_glm_estimates  %>% filter(model=="Model 0: Empty model"), 
                aes(x=model, y=irr, ymin=int80_low, ymax=int80_hi), alpha=0.66) +
  geom_interval(data=alri5_glm_estimates  %>% filter(model=="Model 0: Empty model"), 
                aes(x=model, y=irr, ymin=int66_low, ymax=int66_hi,), alpha=0.95) +  
  geom_point(data=alri5_glm_estimates  %>% filter(model=="Model 0: Empty model"), 
             aes(x=model, y=irr), size=2, shape=18, color="white") + 
  
  annotate("text", x=1.35, y=0.7899, label = "IRR estimate") +
  annotate("text", x=1.35, y=0.796, label = "66% CI") +
  annotate("text", x=1.35, y=0.805, label = "80% CI") +
  annotate("text", x=1.35, y=0.828, label = "95% CI") +
  
  geom_segment(aes(x=1.20, xend=1.05, y=0.7899, yend=0.7899), 
               arrow = arrow(type = "closed", length = unit(0.5, "lines"), 
                             angle = 15), color="black") +
  geom_segment(aes(x=1.20, xend=1.05, y=0.796, yend=0.796), 
               arrow = arrow(type = "closed", length = unit(0.5, "lines"), 
                             angle = 15), color="black") +
  geom_segment(aes(x=1.20, xend=1.05, y=0.805, yend=0.805), 
               arrow = arrow(type = "closed", length = unit(0.5, "lines"), 
                             angle = 15), color="black") +
  geom_segment(aes(x=1.20, xend=1.05, y=0.7899, yend=0.7899), 
               arrow = arrow(type = "closed", length = unit(0.5, "lines"), 
                             angle = 15), color="black") +
  geom_segment(aes(x=1.20, xend=1.05, y=0.796, yend=0.796), 
               arrow = arrow(type = "closed", length = unit(0.5, "lines"), 
                             angle = 15), color="black") +
  geom_segment(aes(x=1.20, xend=1.05, y=0.828, yend=0.828), 
               arrow = arrow(type = "closed", length = unit(0.5, "lines"), 
                             angle = 15), color="black") +
  
  scale_color_viridis_d() + 
  theme_void() + 
  theme(axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.title = element_blank(),
        plot.title = element_blank(),
        plot.subtitle = element_blank(), 
        panel.background = element_rect(color="black"),
        legend.position="none",
        legend.title = element_blank(),
        legend.text=element_blank())



# 6. Model in GAMs --------

# 6.1 k=3 -------

# Empty model -------

alri5_gam_empty_mod <- gam(ALRI5_5_plus ~
                             offset(log(under5population)) + 
                             
                             s(cf,k=3, fx=T) +
                             
                             as.factor(canton_id_universal) +
                             as.factor(period)
                           ,
                           data=full_df,
                           family = quasipoisson())

# Model 1 -------

alri5_gam_1_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf) +
                         
                         percent_rural_corrected_ratio + 
                         electricidad_no + 
                         
                         # material_techo_nicer_all + 
                         # material_techo_hormigon_losa_cemento_use + 
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + 
                         # material_piso_tierra + 
                         materials_index + 
                         
                         # obtiene_agua_redpublica_tuberia_adentro_use + 
                         # servicio_higenico_redpublica_use + 
                         # elimina_servidas_red_pozo + 
                         elimina_servidas_red_pozo_letrina + 
                         # servicio_ducha_excluso + 
                         # elimina_basura_service + 
                         # hygiene_index + 
                         
                         Literate_women + 
                         InSchool_girls + 
                         Idioma_indigena_self + 
                         
                         # BCG_use + 
                         # DPT3_use + 
                         # SAR_use + 
                         # Polio_use + 
                         pcv3_use + 
                         vaccine_index + 
                         
                         # mage_mean +
                         # prenatal_care2 +
                         # prenatal_care_visits +
                         ANC_use + # pretty much
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       ,
                       data=full_df,
                       family = quasipoisson())

plot(alri5_gam_1_mod)

# Model 2 --------

alri5_gam_2_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + 
                         electricidad_no + 
                         
                         # material_techo_nicer_all + 
                         # material_techo_hormigon_losa_cemento_use + 
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + 
                         # material_piso_tierra + 
                         materials_index + 
                         
                         # obtiene_agua_redpublica_tuberia_adentro_use + 
                         # servicio_higenico_redpublica_use + 
                         # elimina_servidas_red_pozo + 
                         elimina_servidas_red_pozo_letrina + 
                         # servicio_ducha_excluso + 
                         # elimina_basura_service + 
                         # hygiene_index + 
                         
                         # Literate_women + 
                         InSchool_girls + 
                         Idioma_indigena_self + 
                         
                         # BCG_use + 
                         # DPT3_use + 
                         # SAR_use + 
                         # Polio_use + 
                         pcv3_use + 
                         vaccine_index + 
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use+
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df,
                       family = quasipoisson())

# Model 3 --------
alri5_gam_3_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + 
                         electricidad_no + 
                         
                         # material_techo_nicer_all + 
                         # material_techo_hormigon_losa_cemento_use + 
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + 
                         # material_piso_tierra + 
                         materials_index + 
                         
                         obtiene_agua_redpublica_tuberia_adentro_use + 
                         # servicio_higenico_redpublica_use + 
                         # elimina_servidas_red_pozo + 
                         # elimina_servidas_red_pozo_letrina + 
                         # servicio_ducha_excluso + 
                         # elimina_basura_service + 
                         # hygiene_index + 
                         
                         Literate_women + 
                         InSchool_girls + 
                         Idioma_indigena_self + 
                         
                         # BCG_use + 
                         # DPT3_use + 
                         # SAR_use + 
                         # Polio_use + 
                         pcv3_use + 
                         vaccine_index + 
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df,
                       family = quasipoisson())

# Model 4 --------
alri5_gam_4_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + 
                         electricidad_no + 
                         
                         # material_techo_nicer_all + 
                         # material_techo_hormigon_losa_cemento_use + 
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + 
                         # material_piso_tierra + 
                         materials_index + 
                         
                         obtiene_agua_redpublica_tuberia_adentro_use + 
                         # servicio_higenico_redpublica_use + 
                         # elimina_servidas_red_pozo + 
                         # elimina_servidas_red_pozo_letrina + 
                         # servicio_ducha_excluso + 
                         # elimina_basura_service + 
                         # hygiene_index + 
                         
                         # Literate_women + 
                         InSchool_girls + 
                         Idioma_indigena_self + 
                         
                         # BCG_use + 
                         # DPT3_use + 
                         # SAR_use + 
                         # Polio_use + 
                         pcv3_use + 
                         vaccine_index + 
                         
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df,
                       family = quasipoisson())

# Model 5 --------
alri5_gam_5_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         
                         s(cf, k=4) +
                         
                         percent_rural_corrected_ratio + 
                         electricidad_no + 
                         
                         # material_techo_nicer_all + 
                         # material_techo_hormigon_losa_cemento_use + 
                         # material_pared_hormigon_bloque_ladrillo + # close enough
                         # material_piso_nicer + 
                         # material_piso_tierra + 
                         materials_index + 
                         
                         obtiene_agua_redpublica_tuberia_adentro_use + 
                         # servicio_higenico_redpublica_use + 
                         # elimina_servidas_red_pozo + 
                         # elimina_servidas_red_pozo_letrina + 
                         # servicio_ducha_excluso + 
                         # elimina_basura_service + 
                         # hygiene_index + 
                         
                         # Literate_women + 
                         InSchool_girls + 
                         Idioma_indigena_self + 
                         
                         # BCG_use + 
                         # DPT3_use + 
                         # SAR_use + 
                         # Polio_use + 
                         pcv3_use + 
                         vaccine_index + 
                         
                         mage_mean + 
                         ANC_use + # pretty much 
                         ANC_visits_median_use +
                         
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       
                       ,
                       data=full_df,
                       family = quasipoisson())


# Combine data from all plots -------

estimates_df <- estimate0 %>% 
  mutate(model = "Empty model") %>% 
  bind_rows(estimate1 %>% 
              mutate(model = "Model 1:\nPreferred specification")) %>% 
  bind_rows(estimate2 %>% 
              mutate(model = "Model 2:\nModel 1 minus adult\nfemale literacy")) %>% 
  bind_rows(estimate3 %>% 
              mutate(model = "Model 3:\n Model 1 replace solid waste\nremoval w/ piped water"))  %>% 
  bind_rows(estimate4 %>% 
              mutate(model = "Model 4:\n Model 3 minus adult\n female literacy")) 


preferred_spline_plot <-
  ggplot(estimates_df %>% 
           filter(model == "Model 1:\nPreferred specification"), aes(x=cf, y=fit, ymin=fit.low, ymax=fit.high, group=model, color=model)) + 
  # geom_segment(aes(x=0.6135,xend=0.6135, y=-2.25, yend=2.35), linetype=2, size=0.55, color="black") + 
  geom_ribbon(alpha=0.1, fill="dodgerblue3", color=NA) + 
  geom_line(size=0.8, color="dodgerblue4") + 
  
  
  # geom_segment(aes(x=0.30, xend=0.30, y=0.355, yend=0.405),
  #              color="black") +
  # geom_segment(aes(x=0.40, xend=0.40, y=0.355, yend=0.405),
  #              color="black") +
  # geom_segment(aes(x=0.31, xend=0.39, y=0.407, yend=0.407),
  #              color="black",
  #              arrow = arrow(length = unit(2, "mm"))) +
  # annotate("text", x=0.28, y=0.44, label = "30%", size=4) +
  # annotate("text", x=.42, y=0.44, label = "40%", size=4) +
# annotate("text", x=0.35, y=0.64, label = "IRR: 1.00\n(95% CI, 0.92-1.08)") +
# 
# geom_segment(aes(x=0.75, xend=0.75, y=0.04, yend=0.09),
#              color="black") +
# geom_segment(aes(x=.85, xend=.85, y=-0.16, yend=-0.11),
#              color="black") +
# geom_segment(aes(x=0.76, xend=0.84, y=0.10, yend=-0.05), 
#              color="black", 
#              arrow = arrow(length = unit(2, "mm"))) + 
# annotate("text", x=0.74, y=0.19, label = "75%", size=4) + 
# annotate("text", x=0.87, y=-0.06, label = "85%", size=4) + 
# annotate("text", x=0.87, y=0.22, label = "IRR: 0.82\n(95% CI, 0.79-0.85)") + 

theme_bw() + 
  coord_cartesian(ylim=c(-.7, 1)) +
  # geom_hline(yintercept = 0, linetype = 2, size=0.3) + 
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + 
  ylab("Response of under-5 LRI mortality rate") + 
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title.x=element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, family="Helvetica"),
        axis.title.y = element_text(size=11, family = "Helvetica", face="bold")) 


histogram <- ggplot(full_df, aes(x=cf)) +
  geom_histogram(fill="dodgerblue4", alpha=0.8, color="black", bins = 50) + 
  theme_bw() +
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + ylab("Count of cantons") +
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title = element_text(size=13, family = "Helvetica", face="bold"))  


preferred_spline_histogram_plot <- cowplot::plot_grid(
  preferred_spline_plot,
  histogram,
  align="hv",
  nrow=2,
  rel_heights = c(1, 0.5)
)

# preferred_spline_histogram_plot

splines_1to4_plot <- ggplot(estimates_df, 
                            aes(x=cf, y=fit, ymin=fit.low, ymax=fit.high, group=model, color=model)) + 
  geom_ribbon(alpha=0.05, fill="dodgerblue3", color=NA) +
  geom_line(size=0.7) +
  ggsci::scale_color_lancet() + 
  theme_bw() + 
  # geom_hline(yintercept = 0, linetype = 2, size=0.3) + 
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + 
  ylab("Response of under-5 LRI mortality rate") + 
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title.x=element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=14, family="Helvetica"),
        axis.title.y = element_text(size=13, family = "Helvetica", face="bold")) 


splines_1to4_histogram_plot <- cowplot::plot_grid(
  splines_1to4_plot,
  histogram,
  align="hv",
  nrow=2,
  rel_heights = c(1, 0.5)
)


splines_1to4_histogram_plot


alternative_specifications_fig_full <- cowplot::plot_grid(
  alternative_specifications_glm,
  splines_1to4_histogram_plot,
  nrow=2
  
)

alternative_specifications_fig_full

# 6.2 k=4 --------

# Empty model -------

alri5_gam_4_empty_mod <- gam(ALRI5_5_plus ~
                               offset(log(under5population)) + 
                               
                               s(cf, k=4) +
                               
                               as.factor(canton_id_universal) +
                               as.factor(period)
                             ,
                             data=full_df,
                             family = quasipoisson())

# Model 1 -------

alri5_gam_4_1_mod <- gam(ALRI5_5_plus ~
                           offset(log(under5population)) + 
                           
                           s(cf) +
                           
                           percent_rural_corrected_ratio + 
                           electricidad_no + 
                           
                           # material_techo_nicer_all + 
                           # material_techo_hormigon_losa_cemento_use + 
                           # material_pared_hormigon_bloque_ladrillo + # close enough
                           # material_piso_nicer + 
                           # material_piso_tierra + 
                           materials_index + 
                           
                           # obtiene_agua_redpublica_tuberia_adentro_use + 
                           # servicio_higenico_redpublica_use + 
                           # elimina_servidas_red_pozo + 
                           elimina_servidas_red_pozo_letrina + 
                           # servicio_ducha_excluso + 
                           # elimina_basura_service + 
                           # hygiene_index + 
                           
                           Literate_women + 
                           InSchool_girls + 
                           Idioma_indigena_self + 
                           
                           # BCG_use + 
                           # DPT3_use + 
                           # SAR_use + 
                           # Polio_use + 
                           pcv3_use + 
                           vaccine_index + 
                           
                           ANC_use + # pretty much 
                           ANC_visits_median_use +
                           
                           as.factor(canton_id_universal) +
                           as.factor(period)
                         ,
                         data=full_df,
                         family = quasipoisson())

# Model 2 --------

alri5_gam_4_2_mod <- gam(ALRI5_5_plus ~
                           offset(log(under5population)) + 
                           
                           s(cf, k=4) +
                           
                           percent_rural_corrected_ratio + 
                           electricidad_no + 
                           
                           # material_techo_nicer_all + 
                           # material_techo_hormigon_losa_cemento_use + 
                           # material_pared_hormigon_bloque_ladrillo + # close enough
                           # material_piso_nicer + 
                           # material_piso_tierra + 
                           materials_index + 
                           
                           # obtiene_agua_redpublica_tuberia_adentro_use + 
                           # servicio_higenico_redpublica_use + 
                           # elimina_servidas_red_pozo + 
                           elimina_servidas_red_pozo_letrina + 
                           # servicio_ducha_excluso + 
                           # elimina_basura_service + 
                           # hygiene_index + 
                           
                           # Literate_women + 
                           InSchool_girls + 
                           Idioma_indigena_self + 
                           
                           # BCG_use + 
                           # DPT3_use + 
                           # SAR_use + 
                           # Polio_use + 
                           pcv3_use + 
                           vaccine_index + 
                           
                           ANC_use + # pretty much 
                           ANC_visits_median_use+
                           
                           as.factor(canton_id_universal) +
                           as.factor(period)
                         
                         ,
                         data=full_df,
                         family = quasipoisson())

# Model 3 --------
alri5_gam_4_3_mod <- gam(ALRI5_5_plus ~
                           offset(log(under5population)) + 
                           
                           s(cf, k=4) +
                           
                           percent_rural_corrected_ratio + 
                           electricidad_no + 
                           
                           # material_techo_nicer_all + 
                           # material_techo_hormigon_losa_cemento_use + 
                           # material_pared_hormigon_bloque_ladrillo + # close enough
                           # material_piso_nicer + 
                           # material_piso_tierra + 
                           materials_index + 
                           
                           obtiene_agua_redpublica_tuberia_adentro_use + 
                           # servicio_higenico_redpublica_use + 
                           # elimina_servidas_red_pozo + 
                           # elimina_servidas_red_pozo_letrina + 
                           # servicio_ducha_excluso + 
                           # elimina_basura_service + 
                           # hygiene_index + 
                           
                           Literate_women + 
                           InSchool_girls + 
                           Idioma_indigena_self + 
                           
                           # BCG_use + 
                           # DPT3_use + 
                           # SAR_use + 
                           # Polio_use + 
                           pcv3_use + 
                           vaccine_index + 
                           
                           ANC_use + # pretty much 
                           ANC_visits_median_use +
                           
                           as.factor(canton_id_universal) +
                           as.factor(period)
                         
                         ,
                         data=full_df,
                         family = quasipoisson())


# Model 4 --------
alri5_gam_4_4_mod <- gam(ALRI5_5_plus ~
                           offset(log(under5population)) + 
                           
                           s(cf, k=4) +
                           
                           # material_techo_nicer_all + 
                           # material_techo_hormigon_losa_cemento_use + 
                           # material_pared_hormigon_bloque_ladrillo + # close enough
                           # material_piso_nicer + 
                           # material_piso_tierra + 
                           materials_index + 
                           
                           obtiene_agua_redpublica_tuberia_adentro_use + 
                           # servicio_higenico_redpublica_use + 
                           # elimina_servidas_red_pozo + 
                           # elimina_servidas_red_pozo_letrina + 
                           # servicio_ducha_excluso + 
                           # elimina_basura_service + 
                           # hygiene_index + 
                           
                           # Literate_women + 
                           InSchool_girls + 
                           Idioma_indigena_self + 
                           
                           # BCG_use + 
                           # DPT3_use + 
                           # SAR_use + 
                           # Polio_use + 
                           pcv3_use + 
                           vaccine_index + 
                           
                           ANC_use + # pretty much 
                           ANC_visits_median_use +
                           
                           as.factor(canton_id_universal) +
                           as.factor(period)
                         ,
                         data=full_df,
                         family = quasipoisson())

# 6.2.2 Grab spline information for all plots -------
p0_4 <- plot(alri5_gam_4_empty_mod, shade=TRUE, select=1)

estimate0_4 <- unlist(p0_4[[1]]$fit) %>% 
  bind_cols(p0_4[[1]]$se) %>% 
  bind_cols(p0_4[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)

p1_4 <- plot(alri5_gam_4_1_mod, shade=TRUE, select=1)

estimate1_4 <- unlist(p1_4[[1]]$fit) %>% 
  bind_cols(p1_4[[1]]$se) %>% 
  bind_cols(p1_4[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)

p2_4 <- plot(alri5_gam_4_2_mod, shade=TRUE, select=1)

estimate2_4 <- unlist(p2_4[[1]]$fit) %>% 
  bind_cols(p2_4[[1]]$se) %>% 
  bind_cols(p2_4[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)

p3_4 <- plot(alri5_gam_4_3_mod, shade=TRUE, select=1)

estimate3_4 <- unlist(p3_4[[1]]$fit) %>% 
  bind_cols(p3_4[[1]]$se) %>% 
  bind_cols(p3_4[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)

p4_4 <- plot(alri5_gam_4_4_mod, shade=TRUE, select=1)

estimate4_4 <- unlist(p4_4[[1]]$fit) %>% 
  bind_cols(p4_4[[1]]$se) %>% 
  bind_cols(p4_4[[1]]$x) %>% 
  rename(cf = `...3`,
         fit = `...1`,
         se = `...2`) %>% 
  mutate(fit.high = fit + se,
         fit.low = fit - se)


# 6.2.3 Combine data from all plots -------

estimates_4_df <- estimate0_4 %>% 
  mutate(model = "Empty model") %>% 
  bind_rows(estimate1_4 %>% 
              mutate(model = "Model 1:\nPreferred specification")) %>% 
  bind_rows(estimate2_4 %>% 
              mutate(model = "Model 2:\nModel 1 minus adult female literacy")) %>% 
  bind_rows(estimate3_4 %>% 
              mutate(model = "Model 3:\n Model 1 replace solid waste removal w/ piped water"))

splines_4_1to4_plot <- ggplot(estimates_4_df, aes(x=cf, y=fit, ymin=fit.low, ymax=fit.high, group=model, color=model)) + 
  geom_ribbon(alpha=0.1, fill="dodgerblue3", color=NA) + 
  geom_line(size=0.8) + 
  scale_color_viridis_d() + 
  theme_bw() + 
  # geom_hline(yintercept = 0, linetype = 2, size=0.3) + 
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + 
  ylab("Response of under-5 LRI mortality rate") + 
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title.x=element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, family="Helvetica"),
        axis.title.y = element_text(size=11, family = "Helvetica", face="bold")) 





# 6.3 Full plot -----

splines_all_1to4_histogram_plot <- cowplot::plot_grid(
  splines_1to4_plot,
  splines_4_1to4_plot,
  histogram,
  align="hv",
  nrow=3,
  rel_heights = c(1, 1, 0.5)
)

splines_all_1to4_histogram_plot



# 7. Segmented regressions --------

# Can't use feglm for these regressions. 

glm0_segment <- segmented::segmented(alri5_glm_empty_mod,
                                     seg.Z = ~ cf10,
                                     psi=c(5.5))

# summary(glm0_segment)
# segmented::slope(glm0_segment)

glm1_segment <- segmented::segmented(alri5_glm_1_mod,
                                     seg.Z = ~ cf10,
                                     psi=c(5))

glm1_segment
summary(glm1_segment)
segmented::slope(glm1_segment)

glm2_segment <- segmented::segmented(alri5_glm_2_mod,
                                     seg.Z = ~ cf10,
                                     psi=c(5))

glm2_segment
summary(glm2_segment)
segmented::slope(glm2_segment)

glm3_segment <- segmented::segmented(alri5_glm_3_mod,
                                     seg.Z = ~ cf10,
                                     psi=c(5))

glm3_segment
summary(glm3_segment)
segmented::slope(glm3_segment)

glm4_segment <- segmented::segmented(alri5_glm_4_mod,
                                     seg.Z = ~ cf10,
                                     psi=c(5))

glm4_segment
summary(glm4_segment)
segmented::slope(glm4_segment)


glm5_segment <- segmented::segmented(alri5_glm_5_mod,
                                     seg.Z = ~ cf10,
                                     psi=c(5))

glm5_segment
summary(glm5_segment)
segmented::slope(glm5_segment)



# 10. Estimate health benefits ----------

predict_canton_accrual <- matrix(NA, length(unique(full_df$canton_id_universal))*30, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 30)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(1990:2019, 169)) %>% 
  mutate(period = as.character(period))

predict_canton_accrual_full_int <-predict_canton_accrual %>% 
  left_join(full_df %>% 
              mutate(period = ifelse(period=="2015-2019", "2015", period)) %>% 
              dplyr::select(canton_id_universal, period,
                            under5population, cf, cf_1990,
                            percent_rural_corrected_ratio, electricidad_no,
                            materials_index, elimina_servidas_red_pozo_letrina,
                            Literate_women, InSchool_girls, Idioma_indigena_self,
                            pcv3_use, vaccine_index, #mage_mean, 
                            ANC_use, ANC_visits_median_use,
                            obtiene_agua_redpublica_tuberia_adentro_use), by=c("canton_id_universal", "period")) %>% 
  mutate(period = as.numeric(period)) %>% 
  group_by(canton_id_universal) %>% 
  mutate(under5population = zoo::na.approx(under5population, na.rm=F),
         cf = zoo::na.approx(cf, na.rm=F),
         cf_1990 = zoo::na.approx(cf_1990, na.rm=F),
         percent_rural_corrected_ratio = zoo::na.approx(percent_rural_corrected_ratio, na.rm=F),
         electricidad_no = zoo::na.approx(electricidad_no, na.rm=F),
         materials_index = zoo::na.approx(materials_index, na.rm=F),
         elimina_servidas_red_pozo_letrina = zoo::na.approx(elimina_servidas_red_pozo_letrina, na.rm=F),
         Literate_women = zoo::na.approx(Literate_women, na.rm=F),
         InSchool_girls = zoo::na.approx(InSchool_girls, na.rm=F),
         Idioma_indigena_self = zoo::na.approx(Idioma_indigena_self, na.rm=F),
         pcv3_use = zoo::na.approx(pcv3_use, na.rm=F),
         vaccine_index = zoo::na.approx(vaccine_index, na.rm=F),
         # mage_mean = zoo::na.approx(mage_mean, na.rm=F),
         ANC_use = zoo::na.approx(ANC_use, na.rm=F),
         ANC_visits_median_use = zoo::na.approx(ANC_visits_median_use, na.rm=F),
         obtiene_agua_redpublica_tuberia_adentro_use = zoo::na.approx(obtiene_agua_redpublica_tuberia_adentro_use, na.rm=F)) %>% 
  group_by(canton_id_universal) %>% 
  mutate(under5population = zoo::na.locf0(under5population),
         cf = zoo::na.locf0(cf),
         cf_1990 = zoo::na.locf0(cf_1990),
         percent_rural_corrected_ratio = zoo::na.locf0(percent_rural_corrected_ratio),
         electricidad_no = zoo::na.locf0(electricidad_no),
         materials_index = zoo::na.locf0(materials_index),
         elimina_servidas_red_pozo_letrina = zoo::na.locf0(elimina_servidas_red_pozo_letrina),
         Literate_women = zoo::na.locf0(Literate_women),
         InSchool_girls = zoo::na.locf0(InSchool_girls),
         Idioma_indigena_self = zoo::na.locf0(Idioma_indigena_self),
         pcv3_use = zoo::na.locf0(pcv3_use),
         vaccine_index = zoo::na.locf0(vaccine_index),
         # mage_mean = zoo::na.locf0(mage_mean),
         ANC_use = zoo::na.locf0(ANC_use),
         ANC_visits_median_use = zoo::na.locf0(ANC_visits_median_use),
         obtiene_agua_redpublica_tuberia_adentro_use = zoo::na.locf0(obtiene_agua_redpublica_tuberia_adentro_use))

predict_canton_accrual_full_int_1990 <- predict_canton_accrual_full_int %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990) %>%
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))

predict_canton_accrual_full_int1 <- predict_canton_accrual_full_int %>% 
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))

predict_canton_accrual_full_int1_half <- predict_canton_accrual_full_int %>% 
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))),
         cf = 0.5*cf)


predict_canton_accrual_full_int_1990_half <- predict_canton_accrual_full_int %>% 
  dplyr::select(-cf) %>% 
  mutate(cf = cf_1990*0.5) %>%
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))



# Counterfactual: no changes in %CF or covariates, present canton-year population --------

predict_canton_accrual_full_int_counterfactual <- predict_canton_accrual_full_int1 %>% 
  mutate(percent_rural_corrected_ratio_1990 = ifelse(period=="1990", percent_rural_corrected_ratio, NA),
         electricidad_no_1990 = ifelse(period=="1990", electricidad_no, NA),
         materials_index_1990 = ifelse(period=="1990", materials_index, NA),
         elimina_servidas_red_pozo_letrina_1990 = ifelse(period=="1990", elimina_servidas_red_pozo_letrina, NA),
         Literate_women_1990 = ifelse(period=="1990", Literate_women, NA),
         InSchool_girls_1990 = ifelse(period=="1990", InSchool_girls, NA),
         Idioma_indigena_self_1990 = ifelse(period=="1990", Idioma_indigena_self, NA),
         pcv3_use_1990 = ifelse(period=="1990", pcv3_use, NA),
         vaccine_index_1990 = ifelse(period=="1990", vaccine_index, NA),
         # mage_mean_1990 = ifelse(period=="1990", mage_mean, NA),
         ANC_use_1990 = ifelse(period=="1990", ANC_use, NA),
         ANC_visits_median_use_1990 = ifelse(period=="1990", ANC_visits_median_use, NA),
         obtiene_agua_redpublica_tuberia_adentro_use_1990 = ifelse(period=="1990", obtiene_agua_redpublica_tuberia_adentro_use, NA)) %>% 
  mutate(percent_rural_corrected_ratio_1990 = min(percent_rural_corrected_ratio_1990, na.rm=T),
         electricidad_no_1990 = min(electricidad_no_1990, na.rm=T),
         materials_index_1990 = min(materials_index_1990, na.rm=T),
         elimina_servidas_red_pozo_letrina_1990 = min(elimina_servidas_red_pozo_letrina_1990, na.rm=T),
         Literate_women_1990 = min(Literate_women_1990, na.rm=T),
         InSchool_girls_1990 = min(InSchool_girls_1990, na.rm=T),
         Idioma_indigena_self_1990 = min(Idioma_indigena_self_1990, na.rm=T),
         pcv3_use_1990 = min(pcv3_use_1990, na.rm=T),
         vaccine_index_1990 = min(vaccine_index_1990, na.rm=T),
         # mage_mean_1990 = min(mage_mean_1990, na.rm=T),
         ANC_use_1990 = min(ANC_use_1990, na.rm=T),
         ANC_visits_median_use_1990 = min(ANC_visits_median_use_1990, na.rm=T),
         obtiene_agua_redpublica_tuberia_adentro_use_1990 = min(obtiene_agua_redpublica_tuberia_adentro_use_1990, na.rm=T)) %>% 
  dplyr::select(-percent_rural_corrected_ratio, -cf,
                -electricidad_no, -materials_index,
                -elimina_servidas_red_pozo_letrina, -Literate_women,
                -InSchool_girls, -Idioma_indigena_self,
                -pcv3_use, -vaccine_index,
                #-mage_mean, 
                -ANC_use, -ANC_visits_median_use,
                -obtiene_agua_redpublica_tuberia_adentro_use) %>% 
  dplyr::rename(percent_rural_corrected_ratio = percent_rural_corrected_ratio_1990, 
                cf = cf_1990,
                electricidad_no = electricidad_no_1990, 
                materials_index = materials_index_1990,
                elimina_servidas_red_pozo_letrina = elimina_servidas_red_pozo_letrina_1990, 
                Literate_women = Literate_women_1990,
                InSchool_girls = InSchool_girls_1990, 
                Idioma_indigena_self = Idioma_indigena_self_1990,
                pcv3_use = pcv3_use_1990, 
                vaccine_index = vaccine_index_1990,
                # mage_mean = mage_mean_1990,
                ANC_use = ANC_use_1990, 
                ANC_visits_median_use = ANC_visits_median_use_1990,
                obtiene_agua_redpublica_tuberia_adentro_use = obtiene_agua_redpublica_tuberia_adentro_use_1990) %>% 
  dplyr::select(-period) %>% 
  mutate(period = "1990")


# 10.1 Main model ---------

alri5_gam_1_mod

# new approach -------

predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_1_mod, newdata = predict_canton_accrual_full_int1, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_1_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue_1 = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

# summarizing health benefits -------

sum(predicted_canton_accrue_1$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_highhigh, na.rm=T)

predicted_canton_accrue_1 %>% 
  mutate(accrual_period = ifelse(year>=1990 & year<=2001, "1990 to 2001",
                                 ifelse(year>2001 & year<=2010, "2001 to 2010",
                                        ifelse(year>2010, "2010 to 2019", NA)))) %>% 
  group_by(accrual_period) %>% 
  summarize(averted_int = sum(averted_alri5, na.rm=T),
            averted_int_low = sum(averted_alri5_lowlow, na.rm = T),
            averted_int_high = sum(averted_alri5_highhigh, na.rm = T))

predicted_canton_accrual_summary <-  predicted_canton_accrue_1 %>% 
  dplyr::group_by(canton_id_universal) %>% 
  dplyr::summarize(averted_int = sum(averted_alri5, na.rm=T),
                   averted_int_low = sum(averted_alri5_lowlow, na.rm = T),
                   averted_int_high = sum(averted_alri5_highhigh, na.rm = T))


table(predicted_canton_accrual_summary$averted_int<0)
table(predicted_canton_accrual_summary$averted_int_low>0)
table(predicted_canton_accrual_summary$averted_int_high<0)

# percent averted ------

predicted_canton_counterfactual <- predict(alri5_gam_1_mod, newdata = 
                                             predict_canton_accrual_full_int_counterfactual, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_counterfactual %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

sum(predicted_canton_counterfactual$fit) # 68,972
sum(predicted_canton_1990$fit) # 37,149
sum(predicted_canton_same$fit) # 29,805

(37149-29805)/(68972-29805)


# Figure summarizing percent averted ---------

predicted_percent_averted_df <- predicted_canton_counterfactual %>% 
  dplyr::select(canton_id_universal, year, fit,se.fit) %>% 
  mutate(model = "'Counterfactual' prediction:\n1990 %CF and covariates, present population") %>% 
  bind_rows(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal, year, fit,se.fit) %>% 
              mutate(model = "'No %CF increase' prediction:\n1990 %CF and present covariates")) %>% 
  bind_rows(predicted_canton_same %>% 
              dplyr::select(canton_id_universal, year, fit,se.fit) %>% 
              mutate(model="'True' prediction:\npresent %CF and covariates")) %>% 
  group_by(model, year) %>% 
  summarize(`under-5 LRI mortalities` = sum(fit, na.rm=T))

ggplot(predicted_percent_averted_df, aes(y=`under-5 LRI mortalities`, x=year, group=model, color=model)) + 
  geom_line(size=1.05) +
  theme_bw() +
  # scale_color_viridis_d() + 
  ggsci::scale_fill_lancet() + 
  scale_x_continuous() + 
  xlab("Year") + ylab("Yearly nationwide\nunder-5 LRI mortalities") +
  theme(axis.text = element_text(size=13, family="Helvetica"),
        legend.title = element_blank(),
        legend.position = "top",
        legend.text = element_text(size=13, family="Helvetica"),
        axis.title = element_text(size=14, family = "Helvetica")) 


# making % averted figure ---------

canton_predicted_percent_averted_df <- predicted_canton_same %>% 
  rename(same = fit) %>% 
  left_join(predicted_canton_counterfactual %>% 
              rename(counterfactual = fit), by = c("canton_id_universal", "year")) %>% 
  left_join(predicted_canton_1990 %>% 
              rename(ninety = fit), by = c("canton_id_universal",  "year")) %>% 
  mutate(diff_ninety_same = (ninety - same),
         diff_counter_same =  (counterfactual-same)) %>% 
  group_by(canton_id_universal) %>% 
  summarize(diff_ninety_same = sum(diff_ninety_same, na.rm=T),
            diff_counter_same = sum(diff_counter_same, na.rm=T)) %>% 
  mutate(percent_averted = diff_ninety_same / diff_counter_same)


sum(canton_predicted_percent_averted_df$counterfactual, na.rm=T)
sum(canton_predicted_percent_averted_df$ninety, na.rm=T)
sum(canton_predicted_percent_averted_df$same, na.rm=T)

sum(canton_predicted_percent_averted_df$diff, na.rm=T)



ecu_shp2_df_bind_averted <- ecu_shp2_df_bind %>% 
  left_join(canton_predicted_percent_averted_df %>% 
              dplyr::select(canton_id_universal, percent_averted) %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              dplyr::select(-canton_id_universal))

my_breaks <- c(-0.10, 0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5)
my_breaks <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5)
my_labels = c("0%", "10%", "20%", "30%", "40%", "50%")
my_breaks_3 <- c(0, .05, 0.1, .15, 0.2,.25, 0.3)
my_labels_3 = c("0%", "5%", "10%", "15%", "20%", "25%", ">30%")

canton_averted_map <- ggplot(data=ecu_shp2_df_bind_averted) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=percent_averted), colour = "white", size=.005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = .25) + 
  scale_fill_viridis_c(
    breaks = my_breaks_3, labels = my_labels_3,
    limits=c(0, 0.3),
    oob=scales::squish
  ) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% of under-5 LRI mortality averted\nattributable to increased %CF") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())

canton_averted_map_galapagos <- ggplot(data=ecu_shp2_df_bind_averted %>% 
                                         mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                         filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=percent_averted), colour = "white", size=.005) +
  scale_fill_viridis_c(
    breaks = my_breaks_3, labels = my_labels_3,
    limits=c(0, 0.3),
    oob=scales::squish
  ) +
  labs(fill = "% of under-5 LRI mortality averted\nattributable to increased %CF") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())


canton_averted_map_full <- ggdraw() +
  draw_plot(canton_averted_map, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_averted_map_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)

averted_map_legend <- get_legend(
  ggplot(data=ecu_shp2_df_bind_averted) + 
    theme_classic() +
    geom_sf(aes(fill=percent_averted), colour = "white", size=0.005) +
    geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
    scale_fill_viridis_c(
      breaks = my_breaks_3, labels = my_labels_3,
      limits=c(0, 0.3),
      oob=scales::squish
    ) +
    theme(legend.title = element_blank(),
          legend.text = element_text(size=12),
          # legend.position = c(0.83, 0.20),
          legend.position = "top",
          legend.key.height = unit(0.6, 'cm'),
          legend.key.width = unit(1.8, 'cm'),
          plot.title.position = "plot",
          plot.title = element_text(size=24))
)

plot(averted_map_legend)

averted_map_full <- plot_grid(
  averted_map_legend,
  canton_averted_map_full,
  nrow=2, 
  rel_heights = c(0.3, 1)
)

cowplot::ggsave2("~/Desktop/Figure3_Gould_EHP_2022.pdf",
       averted_map_full,
       width = 150,
       height = 150,
       units = "mm",
       dpi = 600)

# ggsave("~/Dropbox/Ecuador National Health and LPG Analysis/Planning/Figures/canton_averted_map_legend.png",
#        canton_averted_map_legend,
#        width = 150,
#        height = 150,
#        units = "mm",
#        dpi = 250)


canton_predicted_percent_averted_df <- predicted_canton_counterfactual %>% 
  dplyr::select(canton_id_universal, year, fit,se.fit) %>% 
  mutate(model = "Counterfactual") %>% 
  bind_rows(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal, year, fit,se.fit) %>% 
              mutate(model = "'No %CF increase' prediction:\n1990 %CF and present covariates")) %>% 
  bind_rows(predicted_canton_same %>% 
              dplyr::select(canton_id_universal, year, fit,se.fit) %>% 
              mutate(model="True")) %>% 
  group_by(canton_id_universal, model, year) %>% 
  summarize(`under-5 LRI mortalities` = sum(fit, na.rm=T)) %>% 
  filter(model!="'No %CF increase' prediction:\n1990 %CF and present covariates") %>% 
  pivot_wider(id_cols=c(canton_id_universal, year), names_from = model, values_from=`under-5 LRI mortalities`) %>% 
  mutate(canton_year_averted = Counterfactual - True) %>% 
  group_by(canton_id_universal) %>% 
  summarize(Counterfactual = sum(Counterfactual, na.rm=T),
            True = sum(True, na.rm=T)) %>% 
  mutate(Averted_num = Counterfactual - True,
         Averted_percent = (Counterfactual - True) / (Counterfactual))

canton_predicted_percent_averted_df

sum(canton_predicted_percent_averted_df$Averted_num)


# Making a figure to summarize averted alri 5 ------

predicted_cantonperiod_accrual_combined <- predicted_canton_accrue_1 %>% 
  mutate(accrual_period = ifelse(year>=1990 & year<=2001, "1990 to 2001",
                                 ifelse(year>2001 & year<=2010, "2001 to 2010",
                                        ifelse(year>2010, "2010 to 2019", NA)))) %>% 
  filter(!is.na(predicted_alri5_same)) %>% 
  filter(year==2001 | year == 2010 | year==2017)

predicted_cantonperiod_accrual_combined %>% 
  group_by(canton_id_universal) %>% 
  summarize(predicted = sum(predicted_alri5_same, na.rm=T),
            averted = sum(averted_alri5, na.rm=T),
            averted_low = sum(averted_alri5_lowlow, na.rm=T),
            averted_high = sum(averted_alri5_highhigh, na.rm=T))

tmp <- predicted_cantonperiod_accrual_combined %>% 
  # filter(canton_id_universal=="01_01" | canton_id_universal=="01_02") %>% 
  dplyr::select(canton_id_universal, accrual_period, predicted_alri5_1990, predicted_alri5_same) %>% 
  distinct() 

colnames(tmp)

tmp_2001 <- tmp %>% 
  filter(accrual_period=="1990 to 2001") %>%
  dplyr::select(-accrual_period) %>% 
  pivot_longer(-canton_id_universal) %>% 
  mutate(x=plyr::mapvalues(name, 
                           c("predicted_alri5_1990", "predicted_alri5_same"),
                           c(0.1, 0.9))) %>% 
  mutate(x=as.numeric(x)) %>% 
  group_by(canton_id_universal) %>% 
  mutate(value_lag = lag(value)) %>% 
  mutate(difference = value_lag - value) %>% 
  mutate(color = ifelse(difference>0, "dodgerblue4", 
                        ifelse(difference==0, "grey50", "tomato3"))) %>% 
  group_by(canton_id_universal) %>% 
  mutate(difference1 = ifelse(is.na(difference), lead(difference), difference),
         color1 = ifelse(is.na(color), lead(color), color))

tmp_2010 <- tmp %>% 
  filter(accrual_period=="2001 to 2010") %>%
  dplyr::select(-accrual_period) %>% 
  pivot_longer(-canton_id_universal) %>% 
  mutate(x=plyr::mapvalues(name, 
                           c("predicted_alri5_1990", "predicted_alri5_same"),
                           c(1.1, 1.9)))%>% 
  mutate(x=as.numeric(x))  %>% 
  group_by(canton_id_universal) %>% 
  mutate(value_lag = lag(value)) %>% 
  mutate(difference = value_lag - value) %>% 
  mutate(color = ifelse(difference>0, "dodgerblue4", 
                        ifelse(difference==0, "grey50", "tomato3"))) %>% 
  group_by(canton_id_universal) %>% 
  mutate(difference1 = ifelse(is.na(difference), lead(difference), difference),
         color1 = ifelse(is.na(color), lead(color), color))

tmp_20152019 <- tmp %>% 
  filter(accrual_period=="2010 to 2019") %>%
  dplyr::select(-accrual_period) %>% 
  pivot_longer(-canton_id_universal) %>% 
  mutate(x=plyr::mapvalues(name, 
                           c("predicted_alri5_1990", "predicted_alri5_same"),
                           c(2.1, 2.9))) %>% 
  mutate(x=as.numeric(x))  %>% 
  group_by(canton_id_universal) %>% 
  mutate(value_lag = lag(value)) %>% 
  mutate(difference = value_lag - value) %>% 
  mutate(color = ifelse(difference>0, "dodgerblue4", 
                        ifelse(difference==0, "grey50", "tomato3"))) %>% 
  group_by(canton_id_universal) %>% 
  mutate(difference1 = ifelse(is.na(difference), lead(difference), difference),
         color1 = ifelse(is.na(color), lead(color), color))

tmp_full <- tmp_2001 %>% 
  bind_rows(tmp_2010) %>% 
  bind_rows(tmp_20152019) %>% 
  mutate(x=as.numeric(x))

my_breaks_accrue <- c(-1, 0, 1, 3, 10, 20, 40)

yearly_differences_accrual <- ggplot() + 
  # geom_point(data=tmp_full, 
  #            aes(x=x, y=value)) +
  # geom_line(data=tmp_full,
  #           aes(x=x, y=value, group=canton_id_universal))  +
  geom_point(data=tmp_2001,
             aes(x=x, y=value, color=difference1), alpha=0.8) +
  geom_line(data=tmp_2001,
            aes(x=x, y=value, group=canton_id_universal, color=difference1), alpha=0.6) +
  geom_point(data=tmp_2010,
             aes(x=x, y=value, color=difference1), alpha=0.8) +
  geom_line(data=tmp_2010,
            aes(x=x, y=value, group=canton_id_universal, color=difference1), alpha=0.6) +
  geom_point(data=tmp_20152019,
             aes(x=x, y=value, color=difference1), alpha=0.8) +
  geom_line(data=tmp_20152019,
            aes(x=x, y=value, group=canton_id_universal, color=difference1), alpha=0.6) +
  # scale_color_viridis_c() + 
  scale_color_viridis_c(breaks = my_breaks_accrue, labels = my_breaks_accrue,
                        trans = scales::pseudo_log_trans(sigma = 0.5),
                        limits=c(-2, 40),
                        oob=scales::squish) +
  labs(color = "Averted under-5 LRI mortalities") + 
  theme_bw() +
  ylab("Predicted under-5\nLRI mortalities") +
  xlab("") +
  scale_x_continuous(breaks=c(0.5, 1.5, 2.5), 
                     labels=c("Difference in 2001\n(%CF 1990 - %CF 2001)", "Difference in 2010\n(%CF 1990 - %CF 2010)", "Difference in 2017\n(%CF 1990 - %CF 2017)")) +
  scale_y_sqrt(breaks=c(1, 5, 10, 25, 50, 100, 200, 300)) + 
  geom_label(aes(x=0.5, y=350), label="Yearly total: 328\n95% CI: 112 to 544") +
  geom_label(aes(x=1.5, y=350), label="Yearly total: 314\n95% CI: 89 to 538") +
  geom_label(aes(x=2.5, y=350), label="Yearly total: 244\n95% CI: 63 to 424") +
  theme(axis.text = element_text(size=14, color="black"),
        axis.title.y = element_text(size=15, color="black"),
        legend.title = element_text(size=14, color="black"),
        legend.position = "top",
        legend.text = element_text(size=13,color="black"),
        legend.key.width = unit(13,"mm"),
        legend.key.height = unit(7,"mm"),
        axis.title.x = element_blank())

# % averted

ecu_shp2_df_bind3 <- ecu_shp2_df_bind %>% 
  left_join(predicted_canton_accrual_summary %>% 
              dplyr::select(canton_id_universal, averted_int) %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              dplyr::select(-canton_id_universal))

my_breaks <- c(0, 5, 10, 25, 50, 100, 200, 450)

canton_accrue_map <- ggplot(data=ecu_shp2_df_bind3) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=averted_int), colour = alpha("white", 1 / 2), size=.2) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 1) + 
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 2),
                       limits=c(0, 450),
                       oob=scales::squish) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "under-5 LRI mortality rate") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())

canton_accrue_map_galapagos <- ggplot(data=ecu_shp2_df_bind3 %>% 
                                        mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                        filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=averted_int), colour = alpha("white", 1 / 2), size=.05) +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks, 
                       trans = scales::pseudo_log_trans(sigma = 2),
                       limits=c(0, 450),
                       oob=scales::squish) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())


canton_accrue_map_full <- ggdraw() +
  draw_plot(canton_accrue_map, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_accrue_map_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)

accrue_map_legend <- get_legend(
  ggplot(data=ecu_shp2_df_bind3 %>% 
           mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
           filter(Galapagos == "EC20")) + 
    ggmap::theme_nothing() +
    geom_sf(aes(fill=averted_int), colour = alpha("white", 1 / 2), size=.05) +
    scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks, 
                         trans = scales::pseudo_log_trans(sigma = 2),
                         limits=c(0, 450),
                         oob=scales::squish) + 
    labs(fill = "Total averted under-5\nLRI mortalities") +
    theme(legend.title = element_text(size=18, face = "bold"),
          legend.text = element_text(size=16),
          # legend.position = c(0.83, 0.20),
          legend.position = "top",
          legend.key.height = unit(0.6, 'cm'),
          legend.key.width = unit(1.5, 'cm'),
          plot.title.position = "plot",
          plot.title = element_text(size=24))
)

plot(accrue_map_legend)

accrue_map_full <- plot_grid(
  accrue_map_legend,
  canton_accrue_map_full,
  nrow=2, 
  rel_heights = c(0.3, 1)
)

# ggsave("~/Dropbox/Ecuador National Health and LPG Analysis/Planning/Figures/canton_accrue_map_full_June1.png",
#        accrue_map_full,
#        width = 200,
#        height = 225,
#        units = "mm",
#        dpi = 250)



# # map -------

predicted_canton_accrual_summary

ecu_shp2_df_bind3 <- ecu_shp2_df_bind %>% 
  left_join(predicted_canton_accrual_summary %>% 
              dplyr::select(canton_id_universal, averted_int) %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              dplyr::select(-canton_id_universal))

my_breaks <- c(0, 5, 10, 25, 50, 100, 200, 450)

canton_accrue_map <- ggplot(data=ecu_shp2_df_bind3) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=averted_int), colour = alpha("white", 1 / 2), size=.2) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 1) + 
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 2),
                       limits=c(0, 450),
                       oob=scales::squish) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "under-5 LRI mortality rate") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())

canton_accrue_map_galapagos <- ggplot(data=ecu_shp2_df_bind3 %>% 
                                        mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                        filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=averted_int), colour = alpha("white", 1 / 2), size=.05) +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks, 
                       trans = scales::pseudo_log_trans(sigma = 2),
                       limits=c(0, 450),
                       oob=scales::squish) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())


canton_accrue_map_full <- ggdraw() +
  draw_plot(canton_accrue_map, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_accrue_map_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)

accrue_map_legend <- get_legend(
  ggplot(data=ecu_shp2_df_bind3 %>% 
           mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
           filter(Galapagos == "EC20")) + 
    ggmap::theme_nothing() +
    geom_sf(aes(fill=averted_int), colour = alpha("white", 1 / 2), size=.05) +
    scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks, 
                         trans = scales::pseudo_log_trans(sigma = 2),
                         limits=c(0, 450),
                         oob=scales::squish) + 
    labs(fill = "Total averted under-5\nLRI mortalities") +
    theme(legend.title = element_text(size=18, face = "bold"),
          legend.text = element_text(size=16),
          # legend.position = c(0.83, 0.20),
          legend.position = "top",
          legend.key.height = unit(0.6, 'cm'),
          legend.key.width = unit(1.5, 'cm'),
          plot.title.position = "plot",
          plot.title = element_text(size=24))
)

plot(accrue_map_legend)

accrue_map_full <- plot_grid(
  accrue_map_legend,
  canton_accrue_map_full,
  nrow=2, 
  rel_heights = c(0.3, 1)
)

# ggsave("~/Dropbox/Ecuador National Health and LPG Analysis/Planning/Figures/canton_accrue_map_full_June1.png",
#        accrue_map_full,
#        width = 200,
#        height = 225,
#        units = "mm",
#        dpi = 250)


# combined accrual figure -------

combined_accrual_figure <- plot_grid(
  yearly_differences_accrual, 
  accrue_map_full,
  nrow=1,
  labels = c("A", "B"),
  label_size = 34,
  label_fontface = "bold",
  label_fontfamily = "Arial",
  align="hv",
  rel_widths = c(1, 0.8)
)

# ggsave("~/Dropbox/Ecuador National Health and LPG Analysis/Planning/Figures/combined_accrual_figure_June1.png",
#        combined_accrual_figure,
#        width = 400,
#        height = 240,
#        units = "mm",
#        dpi = 250)

# other -------

predicted_canton_half1 <- predicted_canton_half %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990_half %>% 
              dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_half_accrue = predicted_canton_half1 %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

sum(predicted_canton_half_accrue$averted_alri5, na.rm=T)
sum(predicted_canton_half_accrue$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_half_accrue$averted_alri5_highhigh, na.rm=T)

predicted_canton_accrual_2001 <- predict_canton_accrual %>%
  filter(period>=1990 & period<=2001) %>% 
  mutate(period = as.character(period)) %>% 
  left_join(predicted_canton_accrue_1, by = c("canton_id_universal", "period")) %>% 
  mutate(averted_int = ifelse(period=="1990", 0, averted_alri5),
         averted_int_low = ifelse(period=="1990", 0, averted_alri5_lowlow),
         averted_int_high = ifelse(period=="1990", 0, averted_alri5_highhigh)) %>% 
  dplyr::select(canton_id_universal, period, cf, averted_int, averted_alri5, everything()) %>% 
  group_by(canton_id_universal) %>% 
  mutate(averted_int = zoo::na.approx(averted_int, na.rm=F),
         averted_int_low = zoo::na.approx(averted_int_low, na.rm=F),
         averted_int_high = zoo::na.approx(averted_int_high, na.rm=F)) %>% 
  mutate(accrue_period = "1990 to 2001")

predicted_canton_accrual_2010 <- predict_canton_accrual %>%
  filter(period>=2001 & period<=2010) %>% 
  mutate(period = as.character(period)) %>% 
  left_join(predicted_canton_accrue_1, by = c("canton_id_universal", "period")) %>% 
  mutate(averted_int = ifelse(period=="2001", 0, averted_alri5),
         averted_int_low = ifelse(period=="2001", 0, averted_alri5_lowlow),
         averted_int_high = ifelse(period=="2001", 0, averted_alri5_highhigh)) %>% 
  dplyr::select(canton_id_universal, period, cf, averted_int, averted_alri5, everything()) %>% 
  group_by(canton_id_universal) %>% 
  mutate(averted_int = zoo::na.approx(averted_int, na.rm=F),
         averted_int_low = zoo::na.approx(averted_int_low, na.rm=F),
         averted_int_high = zoo::na.approx(averted_int_high, na.rm=F))%>% 
  mutate(accrue_period = "2001 to 2010")

predicted_canton_accrual_2015 <- predict_canton_accrual %>%
  filter(period>=2010 & period<=2019) %>% 
  mutate(period = ifelse(period==2015, "2015-2019", as.character(period))) %>% 
  left_join(predicted_canton_accrue_1, by = c("canton_id_universal", "period")) %>% 
  mutate(averted_int = ifelse(period=="2010", 0, averted_alri5),
         averted_int_low = ifelse(period=="2010", 0, averted_alri5_lowlow),
         averted_int_high = ifelse(period=="2010", 0, averted_alri5_highhigh)) %>% 
  dplyr::select(canton_id_universal, period, cf, averted_int, averted_alri5, everything()) %>% 
  group_by(canton_id_universal) %>% 
  mutate(averted_int = zoo::na.approx(averted_int, na.rm=F),
         averted_int_low = zoo::na.approx(averted_int_low, na.rm=F),
         averted_int_high = zoo::na.approx(averted_int_high, na.rm=F)) %>% 
  mutate(averted_int = zoo::na.locf0(averted_int),
         averted_int_low = zoo::na.locf0(averted_int_low),
         averted_int_high = zoo::na.locf0(averted_int_high)) %>% 
  mutate(accrue_period = "2010 to 2015-2019")



predicted_canton_accrual_combined <- predicted_canton_accrual_2001 %>% 
  bind_rows(predicted_canton_accrual_2010) %>% 
  bind_rows(predicted_canton_accrual_2015)

sum(predicted_canton_accrual_combined$averted_int)
sum(predicted_canton_accrual_combined$averted_int_low, na.rm=T)
sum(predicted_canton_accrual_combined$averted_int_high, na.rm=T)




# 10.2 Model 2 -------

alri5_gam_1_mod

# new approach -------

predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_2_mod, newdata = predict_canton_accrual_full_int1, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_2_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

predicted_canton_half <- predict(alri5_gam_2_mod, newdata = predict_canton_accrual_full_int1_half, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1_half %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990_half <- predict(alri5_gam_2_mod, newdata = predict_canton_accrual_full_int_1990_half, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990_half %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 


predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue_2 = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

# summarizing health benefits -------

sum(predicted_canton_accrue_1$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_highhigh, na.rm=T)

predicted_canton_accrue_1 %>% 
  mutate(accrual_period = ifelse(year>=1990 & year<=2001, "1990 to 2001",
                                 ifelse(year>2001 & year<=2010, "2001 to 2010",
                                        ifelse(year>2010, "2010 to 2019", NA)))) %>% 
  group_by(accrual_period) %>% 
  summarize(averted_int = sum(averted_alri5, na.rm=T),
            averted_int_low = sum(averted_alri5_lowlow, na.rm = T),
            averted_int_high = sum(averted_alri5_highhigh, na.rm = T))

predicted_canton_accrual_summary <-  predicted_canton_accrue_1 %>% 
  dplyr::group_by(canton_id_universal) %>% 
  dplyr::summarize(averted_int = sum(averted_alri5, na.rm=T),
                   averted_int_low = sum(averted_alri5_lowlow, na.rm = T),
                   averted_int_high = sum(averted_alri5_highhigh, na.rm = T))


table(predicted_canton_accrual_summary$averted_int_low>0)

# percent averted
predicted_canton_1990_baseline <- predicted_canton_1990 %>% 
  filter(year==1990)

sum(predicted_canton_1990_baseline$fit)*30


# 10.3 Model 3 -------


predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_3_mod, newdata = predict_canton_accrual_full_int1, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_3_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 


predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue_3 = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

# summarizing health benefits -------

sum(predicted_canton_accrue_1$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_highhigh, na.rm=T)

# percent averted
predicted_canton_1990_baseline <- predicted_canton_1990 %>% 
  filter(year==1990)

sum(predicted_canton_1990_baseline$fit)*30

# 10.4 Model 4 -------


predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_4_mod, newdata = predict_canton_accrual_full_int1, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_4_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 


predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue_4 = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

# summarizing health benefits -------

sum(predicted_canton_accrue_1$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_highhigh, na.rm=T)

# percent averted
predicted_canton_1990_baseline <- predicted_canton_1990 %>% 
  filter(year==1990)

sum(predicted_canton_1990_baseline$fit)*30

# 10.5 Model 5 -------


predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_5_mod, newdata = predict_canton_accrual_full_int1, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_5_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 


predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue_5 = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

# summarizing health benefits -------

sum(predicted_canton_accrue_5$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_5$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_5$averted_alri5_highhigh, na.rm=T)

# percent averted
predicted_canton_1990_baseline <- predicted_canton_1990 %>% 
  filter(year==1990)

sum(predicted_canton_1990_baseline$fit)*30

sum(predicted_canton_counterfactual$fit) # 68,972
sum(predicted_canton_1990$fit) # 37,149
sum(predicted_canton_same$fit) # 29,805



# 10.5 Make figure for health benefits --------

benefits_accrued_df <- predicted_canton_accrue_1 %>% 
  mutate(model = "Main model") %>% 
  bind_rows(predicted_canton_accrue_2 %>% 
              mutate(model = "Model 2")) %>% 
  bind_rows(predicted_canton_accrue_3 %>% 
              mutate(model = "Model 3")) %>% 
  bind_rows(predicted_canton_accrue_4 %>% 
              mutate(model = "Model 4")) %>% 
  bind_rows(predicted_canton_accrue_5 %>% 
              mutate(model = "Model 5"))

total_accrue <- benefits_accrued_df %>% 
  group_by(model) %>% 
  summarize(averted_alri5_total = sum(averted_alri5, na.rm=T),
            averted_alri5_low = sum(averted_alri5_lowlow, na.rm=T),
            averted_alri5_high = sum(averted_alri5_highhigh, na.rm=T))

period_accrue <- benefits_accrued_df %>% 
  group_by(model, period) %>% 
  summarize(averted_alri5_total = sum(averted_alri5_total, na.rm=T),
            averted_alri5_low = sum(averted_alri5_low, na.rm=T),
            averted_alri5_high = sum(averted_alri5_high, na.rm=T))

canton_accrue <- benefits_accrued_df %>% 
  group_by(model, canton_id_universal) %>% 
  summarize(averted_alri5_total = sum(averted_alri5_total, na.rm=T),
            averted_alri5_low = sum(averted_alri5_low, na.rm=T),
            averted_alri5_high = sum(averted_alri5_high, na.rm=T))

ecu_shp2_df_bind_accrue <- ecu_shp2_df_bind %>% 
  left_join(canton_accrue %>% 
              filter(model == "Main model"))

my_breaks <- c(0, 1, 5, 15, 25, 50, 100)

accrue_canton_map <- ggplot(data=ecu_shp2_df_bind_accrue) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=averted_alri5_total), colour = alpha("white", 1 / 2), size=.2) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 1) + 
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(0, 100),
                       oob=scales::squish) + 
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "Averted under-5 LRI mortalities") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "right",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

accrue_galapagos_map <- ggplot(data=ecu_shp2_df_bind_accrue %>% 
                                 mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                 filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=averted_alri5_total), colour = alpha("white", 1 / 2), size=.05) +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(0, 100),
                       oob=scales::squish) + 
  # coord_fixed() + 
  labs(fill = "Averted under-5 LRI mortalities") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


accrue_map_full <- ggdraw() +
  draw_plot(accrue_canton_map, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(accrue_galapagos_map, x = 0.55, y = 0, width = 0.40, height = .35)


# cowplot::ggsave2("~/Dropbox/Ecuador National Health and LPG Analysis/Final_Repository/Figures/Results/canton_accrue_map.png",
#                  accrue_map_full,
#                  width = 175,
#                  height = 175,
#                  units = "mm",
#                  dpi = 200
# )



# 10.6 Empty model benefits ----------

alri5_gam_empty_mod

# new approach -------

predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_empty_mod, newdata = predict_canton_accrual_full_int1, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_empty_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue_1 = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

# summarizing health benefits -------

sum(predicted_canton_accrue_1$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_highhigh, na.rm=T)

# percent averted ------

predicted_canton_counterfactual <- predict(alri5_gam_empty_mod, newdata = 
                                             predict_canton_accrual_full_int_counterfactual, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_counterfactual %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

sum(predicted_canton_counterfactual$fit) # 68,972
sum(predicted_canton_1990$fit) # 37,145
sum(predicted_canton_same$fit) # 29,905



# FOR THE LINEAR MODELS --------

predict_canton_accrual <- matrix(NA, length(unique(full_df$canton_id_universal))*30, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 30)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(1990:2019, 169)) %>% 
  mutate(period = as.character(period))

predict_canton_accrual_full_int <-predict_canton_accrual %>% 
  left_join(full_df %>% 
              mutate(period = ifelse(period=="2015-2019", "2015", period)) %>% 
              dplyr::select(canton_id_universal, period,
                            under5population, cf10, cf_1990,
                            percent_rural_corrected_ratio, electricidad_no,
                            materials_index, elimina_servidas_red_pozo_letrina,
                            Literate_women, InSchool_girls, Idioma_indigena_self,
                            pcv3_use, vaccine_index, ANC_use, ANC_visits_median_use,
                            obtiene_agua_redpublica_tuberia_adentro_use), by=c("canton_id_universal", "period")) %>% 
  mutate(period = as.numeric(period)) %>% 
  group_by(canton_id_universal) %>% 
  mutate(under5population = zoo::na.approx(under5population, na.rm=F),
         cf10 = zoo::na.approx(cf10, na.rm=F),
         cf_1990 = zoo::na.approx(cf_1990, na.rm=F),
         percent_rural_corrected_ratio = zoo::na.approx(percent_rural_corrected_ratio, na.rm=F),
         electricidad_no = zoo::na.approx(electricidad_no, na.rm=F),
         materials_index = zoo::na.approx(materials_index, na.rm=F),
         elimina_servidas_red_pozo_letrina = zoo::na.approx(elimina_servidas_red_pozo_letrina, na.rm=F),
         Literate_women = zoo::na.approx(Literate_women, na.rm=F),
         InSchool_girls = zoo::na.approx(InSchool_girls, na.rm=F),
         Idioma_indigena_self = zoo::na.approx(Idioma_indigena_self, na.rm=F),
         pcv3_use = zoo::na.approx(pcv3_use, na.rm=F),
         vaccine_index = zoo::na.approx(vaccine_index, na.rm=F),
         ANC_use = zoo::na.approx(ANC_use, na.rm=F),
         ANC_visits_median_use = zoo::na.approx(ANC_visits_median_use, na.rm=F),
         obtiene_agua_redpublica_tuberia_adentro_use = zoo::na.approx(obtiene_agua_redpublica_tuberia_adentro_use, na.rm=F)) %>% 
  group_by(canton_id_universal) %>% 
  mutate(under5population = zoo::na.locf0(under5population),
         cf10 = zoo::na.locf0(cf10),
         cf_1990 = zoo::na.locf0(cf_1990),
         percent_rural_corrected_ratio = zoo::na.locf0(percent_rural_corrected_ratio),
         electricidad_no = zoo::na.locf0(electricidad_no),
         materials_index = zoo::na.locf0(materials_index),
         elimina_servidas_red_pozo_letrina = zoo::na.locf0(elimina_servidas_red_pozo_letrina),
         Literate_women = zoo::na.locf0(Literate_women),
         InSchool_girls = zoo::na.locf0(InSchool_girls),
         Idioma_indigena_self = zoo::na.locf0(Idioma_indigena_self),
         pcv3_use = zoo::na.locf0(pcv3_use),
         vaccine_index = zoo::na.locf0(vaccine_index),
         ANC_use = zoo::na.locf0(ANC_use),
         ANC_visits_median_use = zoo::na.locf0(ANC_visits_median_use),
         obtiene_agua_redpublica_tuberia_adentro_use = zoo::na.locf0(obtiene_agua_redpublica_tuberia_adentro_use))

predict_canton_accrual_full_int_1990 <- predict_canton_accrual_full_int %>% 
  dplyr::select(-cf10) %>% 
  rename(cf10 = cf_1990) %>%
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))

predict_canton_accrual_full_int1 <- predict_canton_accrual_full_int %>% 
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))

predict_canton_accrual_full_int1_half <- predict_canton_accrual_full_int %>% 
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))),
         cf10 = 0.5*cf10)


predict_canton_accrual_full_int_1990_half <- predict_canton_accrual_full_int %>% 
  dplyr::select(-cf10) %>% 
  mutate(cf10 = cf_1990*0.5) %>%
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))



# Counterfactual: no changes in %CF or covariates, present canton-year population --------

predict_canton_accrual_full_int_counterfactual <- predict_canton_accrual_full_int1 %>% 
  mutate(percent_rural_corrected_ratio_1990 = ifelse(period=="1990", percent_rural_corrected_ratio, NA),
         electricidad_no_1990 = ifelse(period=="1990", electricidad_no, NA),
         materials_index_1990 = ifelse(period=="1990", materials_index, NA),
         elimina_servidas_red_pozo_letrina_1990 = ifelse(period=="1990", elimina_servidas_red_pozo_letrina, NA),
         Literate_women_1990 = ifelse(period=="1990", Literate_women, NA),
         InSchool_girls_1990 = ifelse(period=="1990", InSchool_girls, NA),
         Idioma_indigena_self_1990 = ifelse(period=="1990", Idioma_indigena_self, NA),
         pcv3_use_1990 = ifelse(period=="1990", pcv3_use, NA),
         vaccine_index_1990 = ifelse(period=="1990", vaccine_index, NA),
         ANC_use_1990 = ifelse(period=="1990", ANC_use, NA),
         ANC_visits_median_use_1990 = ifelse(period=="1990", ANC_visits_median_use, NA),
         obtiene_agua_redpublica_tuberia_adentro_use_1990 = ifelse(period=="1990", obtiene_agua_redpublica_tuberia_adentro_use, NA)) %>% 
  mutate(percent_rural_corrected_ratio_1990 = min(percent_rural_corrected_ratio_1990, na.rm=T),
         electricidad_no_1990 = min(electricidad_no_1990, na.rm=T),
         materials_index_1990 = min(materials_index_1990, na.rm=T),
         elimina_servidas_red_pozo_letrina_1990 = min(elimina_servidas_red_pozo_letrina_1990, na.rm=T),
         Literate_women_1990 = min(Literate_women_1990, na.rm=T),
         InSchool_girls_1990 = min(InSchool_girls_1990, na.rm=T),
         Idioma_indigena_self_1990 = min(Idioma_indigena_self_1990, na.rm=T),
         pcv3_use_1990 = min(pcv3_use_1990, na.rm=T),
         vaccine_index_1990 = min(vaccine_index_1990, na.rm=T),
         ANC_use_1990 = min(ANC_use_1990, na.rm=T),
         ANC_visits_median_use_1990 = min(ANC_visits_median_use_1990, na.rm=T),
         obtiene_agua_redpublica_tuberia_adentro_use_1990 = min(obtiene_agua_redpublica_tuberia_adentro_use_1990, na.rm=T)) %>% 
  dplyr::select(-percent_rural_corrected_ratio, -cf10,
                -electricidad_no, -materials_index,
                -elimina_servidas_red_pozo_letrina, -Literate_women,
                -InSchool_girls, -Idioma_indigena_self,
                -pcv3_use, -vaccine_index,
                -ANC_use, -ANC_visits_median_use,
                -obtiene_agua_redpublica_tuberia_adentro_use) %>% 
  dplyr::rename(percent_rural_corrected_ratio = percent_rural_corrected_ratio_1990, 
                cf10 = cf_1990,
                electricidad_no = electricidad_no_1990, 
                materials_index = materials_index_1990,
                elimina_servidas_red_pozo_letrina = elimina_servidas_red_pozo_letrina_1990, 
                Literate_women = Literate_women_1990,
                InSchool_girls = InSchool_girls_1990, 
                Idioma_indigena_self = Idioma_indigena_self_1990,
                pcv3_use = pcv3_use_1990, 
                vaccine_index = vaccine_index_1990,
                ANC_use = ANC_use_1990, 
                ANC_visits_median_use = ANC_visits_median_use_1990,
                obtiene_agua_redpublica_tuberia_adentro_use = obtiene_agua_redpublica_tuberia_adentro_use_1990) %>% 
  dplyr::select(-period) %>% 
  mutate(period = "1990")


alri5_feqp_1_mod

# new approach -------

predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf10) %>% 
  rename(cf10 = cf_1990)

predicted_canton_same <- predict(alri5_feqp_1_mod, newdata = predict_canton_accrual_full_int1, type="response") %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf10)) 

predicted_canton_1990 <- predict(alri5_feqp_1_mod, newdata = predict_canton_accrual_full_int_1990, type="response") %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf10, year, ALRI5_5_plus)) 

predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, cf10, value) %>% 
  rename(predicted_alri5_same = value) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, value) %>% 
              rename(predicted_alri5_1990 = value), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990,
                predicted_alri5_same,
                everything()) 

predicted_canton_accrue_2 = predicted_canton_full %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same)

# summarizing health benefits -------

sum(predicted_canton_accrue_2$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_2$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_2$averted_alri5_highhigh, na.rm=T)

predicted_canton_accrue_2 %>% 
  mutate(accrual_period = ifelse(year>=1990 & year<=2001, "1990 to 2001",
                                 ifelse(year>2001 & year<=2010, "2001 to 2010",
                                        ifelse(year>2010, "2010 to 2019", NA)))) %>% 
  group_by(accrual_period) %>% 
  summarize(averted_int = sum(averted_alri5, na.rm=T))

predicted_canton_accrual_summary <-  predicted_canton_accrue_2 %>% 
  dplyr::group_by(canton_id_universal) %>% 
  dplyr::summarize(averted_int = sum(averted_alri5, na.rm=T))


# table(predicted_canton_accrual_summary$averted_int_low>0)

# percent averted
predicted_canton_1990_baseline <- predicted_canton_1990 %>% 
  filter(year==1990)

sum(predicted_canton_1990_baseline$value, na.rm=T)*30



